# R script for statistical analysis for "HOLEBOARD STUDY"
# statistical analysis was done by Jillian Hendricks

install.packages("ggplot2")
install.packages("patchwork")

detach("package:ggplot2", unload=TRUE)
detach("package:patchwork", unload=TRUE)
library(ggplot2)
library(patchwork)

find.package("ggplot2")
find.package("patchwork")

# load packages

library(lme4)
library(lmerTest)
library(Matrix)
library(tidyverse)
library(tidyr)
library(dplyr)
library(ggplot2)
library(patchwork)
library(car)
library(lmerPerm)
library(nlme)
library(pgirmess)
library(MuMIn)
library(ordinal)


# import dataset using read.csv

holeboard_masterdata <- read.csv("C:/Users/ou22669/OneDrive - University of Bristol/University of Bristol/Holeboard/Datasets/Holeboard project_DatasetforR.csv")

view(holeboard_masterdata)
summary(holeboard_masterdata)

# change session to numeric 
holeboard_masterdata$Session <- as.numeric(holeboard_masterdata$Session, 
                                           levels = c("5", "6", "7", "8", "9", "10", "11", "12", "13", "14", 
                                                      "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"))

# calculate mean and SD of milk intake for each group
holeboard_masterdata$Treatment<-as.factor(holeboard_masterdata$Treatment)

mean_SE_milk_E <- holeboard_masterdata %>%
  filter(Treatment == "E") %>% 
  group_by(Calf_ID) %>%  
  summarise(mean_milk = mean(Milk_prev, na.rm = TRUE), .groups = "drop") %>%
  summarise(
    Milk_prev = mean(mean_milk, na.rm = TRUE), 
    sd = sd(mean_milk, na.rm = TRUE) / sqrt(n())
  )
print(mean_SE_milk_E)

mean_SE_milk_R <- holeboard_masterdata %>%
  filter(Treatment == "R") %>% 
  group_by(Calf_ID) %>%  
  summarise(mean_milk = mean(Milk_prev, na.rm = TRUE), .groups = "drop") %>%
  summarise(
    Milk_prev = mean(mean_milk, na.rm = TRUE), 
    sd = sd(mean_milk, na.rm = TRUE) / sqrt(n())
  )
print(mean_SE_milk_R)


## run power analysis using data from Lecorps study (hunger affect cognitive performance of dairy calves, experiment 2)
# n = 21
cohen_d <- sqrt((6.62*18)/20)
cohen_d

power_hb <- power.t.test(
  delta = cohen_d,    
  power = 0.80,       
  sig.level = 0.05,   
  type = "two.sample")
power_hb

# Calculate partial eta-squared
eta2_p <- (6.62 * 1) / (6.62 * 1 + 20)
# Convert eta-squared to Cohen's f
sqrt(eta2_p / (1 - eta2_p)) 
#0.575326
# convert Cohen's f to d
2 * 0.575326
#1.150652

#plug cohen's d into power.t.est (delta = d)
power.t.test(n = NULL, 
             delta = 1.15, 
             sig.level = 0.05,  
             power = 0.8,  
             type = "two.sample")
# n = 12.9 per treatment
#NOTE: cohen's d from Ben's study is quite large (>1) so the sample size needed to detect this effect is smaller


#### GRAPHS ####

# no. of visits to buckets - excluded from paper
mean_SE_visits <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_visits = mean(Visits, na.rm = TRUE),
            se = sd(Visits, na.rm = TRUE) / sqrt(n()))

visits_plot <- ggplot(mean_SE_visits, aes(x = Session, y = mean_visits, color = Treatment)) +  
  geom_line(size=1.2) +
  geom_point(size=2.2) +
  geom_errorbar(aes(ymin = mean_visits - se, ymax = mean_visits + se), width = 0.2) +
  labs(x = "Trial", y = "Number of Visits", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 60)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  theme_classic() +
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D")) +
  theme (    
  axis.title = element_text(size = 14),  
  axis.text = element_text(size = 10),
  axis.text.x = element_text(size = 10)) +
  guides(color = "none")
visits_plot

# number of rewarded buckets found

mean_SE_success <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_success = mean(Success, na.rm = TRUE),
            se = sd(Success, na.rm = TRUE) / sqrt(n()))

success_plot <- ggplot(mean_SE_success, aes(x = Session, y = mean_success, color = Treatment, shape = Treatment)) +  
  geom_line(size=1.2) +
  geom_point(size=3.2) +
  geom_errorbar(aes(ymin = mean_success - se, ymax = mean_success + se), width = 0.2) +
  labs(x = "Trial", y = "Number of Baited Buckets Found", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 5), labels = c("0","1","2","3","4"), breaks = c(0,1,2,3,4)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  theme_classic() +
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D")) +
  scale_shape_manual(values = c("E" = 16, "R" = 17)) +
  theme (    
    axis.title = element_text(size = 18),  
    axis.text = element_text(size = 16)) +
  guides(color = "none", shape = "none")
success_plot


# vocalizations

mean_SE_voc <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_voc = mean(Voc, na.rm = TRUE),
            se = sd(Voc, na.rm = TRUE) / sqrt(n()))
view(mean_SE_voc)

voc_plot <- ggplot(mean_SE_voc, aes(x = Session, y = mean_voc, color = Treatment, shape = Treatment)) +  
  geom_line(size=1.2) +
  geom_point(size=3.2) +
  geom_errorbar(aes(ymin = mean_voc - se, ymax = mean_voc + se), width = 0.2) +
  labs(x = "Trial", y = "Number of Vocalisations", color = "Treatment", shape = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 5), labels = c("0","1","2","3","4"), breaks = c(0,1,2,3,4)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  theme_classic()+
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D"), labels = c("E" = "Enhanced", "R" = "Restricted")) +
  scale_shape_manual(values = c("E" = 16, "R" = 17), labels = c("E" = "Enhanced", "R" = "Restricted")) +
  theme(
    legend.text = element_text(size = 22),      
    legend.title = element_text(size = 24),
      axis.title = element_text(size = 18),  
      axis.text = element_text(size = 16))
voc_plot


# trial duration 

mean_SE_trial <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_trial = mean(Trial_duration, na.rm = TRUE),
            se = sd(Trial_duration, na.rm = TRUE) / sqrt(n()))
view(mean_SE_trial)

trialduration_plot <- 
  ggplot(mean_SE_trial, aes(x = Session, y = mean_trial, color = Treatment, shape = Treatment)) +  
  geom_line(size=1.2) +
  geom_point(size=3.2) +
  geom_errorbar(aes(ymin = mean_trial - se, ymax = mean_trial + se), width = 0.2) +
  labs(x = "Trial", y = "Trial Duration (s)", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 350)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  theme_classic() +
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D")) +
  scale_shape_manual(values = c("E" = 16, "R" = 17)) +
  theme (    
    axis.title = element_text(size = 18),  
    axis.text = element_text(size = 16)) +
  guides(color = "none", shape = "none")
trialduration_plot

# latency to first bucket

mean_SE_lat <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_lat = mean(Latency, na.rm = TRUE),
            se = sd(Latency, na.rm = TRUE) / sqrt(n()))
view(mean_SE_lat)

latency_plot <- 
  ggplot(mean_SE_lat, aes(x = Session, y = mean_lat, color = Treatment, shape = Treatment)) +  
  geom_line(size=1.2) +
  geom_point(size=3.2) +
  geom_errorbar(aes(ymin = mean_lat - se, ymax = mean_lat + se), width = 0.2) +
  labs(x = "Trial", y = "Latency (s)", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 20), labels = c("0","5","10","15"), breaks = c(0,5,10,15)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  theme_classic() +
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D")) +
  scale_shape_manual(values = c("E" = 16, "R" = 17)) +
  theme (    
    axis.title = element_text(size = 18),  
    axis.text = element_text(size = 16)) +
  guides(color = "none", shape = "none")
latency_plot

## panel of trial duration, success, latency, no. of visits, and vocalizations



top_row <- success_plot | trialduration_plot
bottom_row <- latency_plot | voc_plot 

(top_row / bottom_row)

(success_plot + trialduration_plot) / (latency_plot + voc_plot)

(top_row / bottom_row) +
  plot_layout(widths = c(1, 1), heights = c(1, 1))

(top_row / bottom_row) + plot_layout(widths = c(1, 1))

ggsave("performance_panel_highres_novisits_3.jpg", (success_plot | trialduration_plot) / (latency_plot | voc_plot), dpi = 300, width = 18, height = 13)


# working memory with success rate
# note y axis removed, change y = NULL back to title

mean_SE_WMS <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_WMS = mean(WMwSR, na.rm = TRUE),
            se = sd(WMwSR, na.rm = TRUE) / sqrt(n()))

WM_plot <- ggplot(mean_SE_WMS, aes(x = Session, y = mean_WMS, color = Treatment, shape = Treatment)) +  
  geom_line(size=1.2) +
  geom_point(size=3.2) +
  geom_errorbar(aes(ymin = mean_WMS - se, ymax = mean_WMS + se), width = 0.2) +
  labs(x = "Trial", y = "Working Memory", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.2)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  theme_classic() +
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D")) +
  scale_shape_manual(values = c("E" = 16, "R" = 17)) +
  theme (    
    axis.title = element_text(size = 18),  
    axis.text = element_text(size = 14)) +
  guides(color = "none", shape = "none")
WM_plot


#without success rate
ggplot(Holeboard_project_DatasetforR, 
       aes(x = Session, y = mean_WM, color = Treatment)) +  
  geom_line() +
  geom_point() +
  labs(x = "Session", y = "Working Memory Score") +
  scale_color_discrete(name = "Calf ID") +
  scale_x_continuous(breaks = unique(Holeboard_project_DatasetforR$Session)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.2),  
                     limits = c(0, 1), 
                     labels = c("0", "0.2", "0.4", "0.6", "0.8", "1")) + 
  theme_classic()

# general working memory

mean_SE_GWM <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_GWM = mean(GWM, na.rm = TRUE),
            se = sd(GWM, na.rm = TRUE) / sqrt(n()))

GWM_plot <- ggplot(mean_SE_GWM, aes(x = Session, y = mean_GWM, color = Treatment, shape = Treatment)) +  
  geom_line(size=1.2) +
  geom_point(size=3.2) +
  geom_errorbar(aes(ymin = mean_GWM - se, ymax = mean_GWM + se), width = 0.2) +
  labs(x = "Trial", y = "General Working Memory", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.2)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  theme_classic() +
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D")) +
  scale_shape_manual(values = c("E" = 16, "R" = 17)) +
  theme (    
    axis.title = element_text(size = 18),  
    axis.text = element_text(size = 14)) +
  guides(color = "none", shape = "none")
GWM_plot


# reference memory with success rate

mean_SE_RMS <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_RMS = mean(RMwSR, na.rm = TRUE),
            se = sd(RMwSR, na.rm = TRUE) / sqrt(n()))

RM_plot <- ggplot(mean_SE_RMS, aes(x = Session, y = mean_RMS, color = Treatment, shape = Treatment)) +  
  geom_line(size=1.2) +
  geom_point(size=3.2) +
  geom_errorbar(aes(ymin = mean_RMS - se, ymax = mean_RMS + se), width = 0.2) +
  labs(x = "Trial", y = "Reference Memory", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 1.1), breaks = seq(0, 1, by = 0.2)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D"), labels = c("E" = "Enhanced", "R" = "Restricted")) +
  scale_shape_manual(values = c("E" = 16, "R" = 17), labels = c("E" = "Enhanced", "R" = "Restricted")) +
  theme_classic() +
  theme(
    legend.text = element_text(size = 18),      
    legend.title = element_text(size = 20),
    axis.title = element_text(size = 18),  
    axis.text = element_text(size = 14))
RM_plot

# panel of the memory score graphs

((WM_plot + GWM_plot) / (RM_plot + plot_spacer()))

top_row_ms <- WM_plot + GWM_plot
bottom_row_ms <- RM_plot + plot_spacer() + plot_layout(widths = c(1, 0.78))

(top_row_ms / bottom_row_ms)


ggsave("memory_panel_highres_trianglepoints.jpg", (top_row_ms / bottom_row_ms), dpi = 300, width = 13, height = 9)

getwd()

# Time spent running


mean_SE_Run <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_Run = mean(Run, na.rm = TRUE),
            se = sd(Run, na.rm = TRUE) / sqrt(n()))

run_plot <- ggplot(mean_SE_Run, aes(x = Session, y = mean_Run, color = Treatment, shape = Treatment)) +  
  geom_line(size=1) +
  geom_point(size=3.2) +
  geom_errorbar(aes(ymin = mean_Run - se, ymax = mean_Run + se), width = 0.2) +
  labs(x = "Trial", y = "Time Spent Running", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 40)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  scale_shape_manual(values = c("E" = 16, "R" = 17)) +
  theme_classic() +
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D")) +
  guides(color = "none", shape = "none")

run_plot

# Point play

mean_SE_Total_freq <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_Total_freq = mean(Total_freq, na.rm = TRUE),
            se = sd(Total_freq, na.rm = TRUE) / sqrt(n()))

freq_plot <- ggplot(mean_SE_Total_freq, aes(x = Session, y = mean_Total_freq, color = Treatment)) +  
  geom_line(size=1) +
  geom_point(size=2.2) +
  geom_errorbar(aes(ymin = mean_Total_freq - se, ymax = mean_Total_freq + se), width = 0.2) +
  labs(x = "Trial", y = "Frequency of Point play behaviours", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  theme_classic()+
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D"))
freq_plot

# play panel

(run_plot + freq_plot)

# New graphs, play per minute; accounts for variation in trial duration as opposed to raw play

mean_SE_Run_min <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_Run_min = mean(Runpermin, na.rm = TRUE),
            se = sd(Runpermin, na.rm = TRUE) / sqrt(n()))

run_permin_plot <- ggplot(mean_SE_Run_min, aes(x = Session, y = mean_Run_min, color = Treatment, shape = Treatment)) +  
  geom_line(size=1.2) +
  geom_point(size=3.2) +
  geom_errorbar(aes(ymin = mean_Run_min - se, ymax = mean_Run_min + se), width = 0.2) +
  labs(x = "Trial", y = "Time Spent Running Per Minute (s)", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 8), breaks = seq(0, 8, by = 2)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  theme_classic() +
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D")) +
  scale_shape_manual(values = c("E" = 16, "R" = 17)) +
  theme(axis.title = element_text(size = 18),  
            axis.text = element_text(size = 14)) +
  guides(color = "none", shape = "none")

run_permin_plot

mean_SE_Total_freq_min <- holeboard_masterdata %>%
  group_by(Treatment, Session) %>%
  summarise(mean_Total_freq_min = mean(Total_freqpermin, na.rm = TRUE),
            se = sd(Total_freqpermin, na.rm = TRUE) / sqrt(n()))

freq_permin_plot <- ggplot(mean_SE_Total_freq_min, aes(x = Session, y = mean_Total_freq_min, color = Treatment, shape = Treatment)) +  
  geom_line(size=1.2) +
  geom_point(size=3.2) +
  geom_errorbar(aes(ymin = mean_Total_freq_min - se, ymax = mean_Total_freq_min + se), width = 0.2) +
  labs(x = "Trial", y = "Number of Point Play Behaviours Per Minute", color = "Treatment") +
  scale_x_continuous(breaks = seq(1, 20, by = 1)) +
  scale_y_continuous(limits = c(0, 3), breaks = seq(0, 10, by = 1)) +
  geom_vline(xintercept = 15, linetype = "dashed", color = "black", linewidth = 0.5) +
  theme_classic()+
  scale_color_manual(values = c("E" = "#00BFC4", "R" = "#F8766D"), labels = c("E" = "Enhanced", "R" = "Restricted")) +
  scale_shape_manual(values = c("E" = 16, "R" = 17), labels = c("E" = "Enhanced", "R" = "Restricted")) +
  theme(
    legend.text = element_text(size = 18),      
    legend.title = element_text(size = 20),
    axis.title = element_text(size = 18),  
    axis.text = element_text(size = 14)
  )
freq_permin_plot

run_permin_plot + freq_permin_plot

ggsave("play_panel_highres_trianglepoints.jpg", (run_permin_plot + freq_permin_plot), dpi = 300, width = 14, height = 7)



#### Statistical analysis ####

## normality testing using histograms

hist(holeboard_masterdata$GWM, main = "Histogram GWM")
# normal
hist(holeboard_masterdata$WMwSR, main = "Histogram WMwSR")
# mostly normal
hist(holeboard_masterdata$RMwSR, main = "Histogram RMwSR")
# normal

## mean success rate for each group + t test to compare groups

# Calculate group means
mean_success <- aggregate(Success ~ Treatment, data = holeboard_masterdata, mean)
print(mean_success)

# repeat with proportion
mean_success_prop <- aggregate(Success_prop ~ Treatment, data = holeboard_masterdata, mean)
print(mean_success_prop)

# t test with proportions
t_test_success <- t.test(Success_prop ~ Treatment, data = holeboard_masterdata, var.equal = TRUE)
print(t_test_success)
# t = -9.1007, df = 398, p-value < 2.2e-16; CI  -0.2872848 -0.1852152; E mean = 0.69125, R mean = 0.92750 

# mean success rate in whole numbers

mean_success_R <- mean(holeboard_masterdata$Success[holeboard_masterdata$Treatment == "R"], na.rm = TRUE)
sd_success_R <- sd(holeboard_masterdata$Success[holeboard_masterdata$Treatment == "R"], na.rm = TRUE)
print(mean_success_R)
print(sd_success_R)
# mean = 3.71

mean_success_E <- mean(holeboard_masterdata$Success[holeboard_masterdata$Treatment == "E"], na.rm = TRUE)
sd_success_E <- sd(holeboard_masterdata$Success[holeboard_masterdata$Treatment == "E"], na.rm = TRUE)
print(mean_success_E)
print(sd_success_E)
# mean = 2.76



## model decision-making + post-hoc (used for all LMMs):
# 1. test random slope vs intercept model for the learning phase - did the AIC recommend a random intercept or a random slope model? or were there issues w/ model running? adjust accordingly 
# - only testing slope vs intercept for learning phase as it is assumed that problems would remain for other 2 phases as the dataset for these are smaller
# - only testing for memory scores
# 2. run model with memory score ~ treatment + session + interaction + (Pair/Calf)
# 3. does model return singularity? YES -> remove Pair; NO -> keep Pair
# 4. does model still return singularity when only Calf ID as random effect? YES -> run pair t-test for both E and R groups
# 4. is interaction significant? YES -> post-hoc with one model per treatment and interaction included, and remove Pair if not done already; NO -> post-hoc remove interaction and re-run model


#### Memory score analysis ####

## Working memory with success rate
## testing random slopes vs random intercepts (session)
#random intercept
WM_L_2 <-lmer(data = subset(holeboard_masterdata, Phase=="L"), WMwSR ~ Treatment*Session + (1|Calf_ID))

#random slope
WM_L_3 <-lmer(data = subset(holeboard_masterdata, Phase=="L"), WMwSR ~ Treatment*Session + (1 + Session |Calf_ID))

#check model fit for both - lower score = better fit
AIC(WM_L_2, WM_L_3)
# model_2 = 75.5; model_3 = 79.2; difference in scores of 4-7 = less support for higher-AIC model
# slope model has worse fit - AIC favours the intercept model

# Initial learning

WM_L <-lmer(data = subset(holeboard_masterdata, Phase=="L"), WMwSR ~ Treatment*Session + (1|Pair/Calf_ID))
# singularity, remove pair nested effect

WM_L_v2 <-lmer(data = subset(holeboard_masterdata, Phase=="L"), WMwSR ~ Treatment*Session + (1|Calf_ID))
summary(WM_L_v2)
#Interaction is significant, post-hoc analysis by treatment
#Since post-hoc analysis is done separately for the two treatments, "Pair" was removed from the nested random factor. ** remove pair nested for all post-hoc when interaction sign.

WM_L_E <- lmer(data = subset(holeboard_masterdata, Phase=="L" & Treatment=="E"), WMwSR ~ Session + (1|Calf_ID))
WM_L_R <- lmer(data = subset(holeboard_masterdata, Phase=="L" & Treatment=="R"), WMwSR ~ Session + (1|Calf_ID))
summary(WM_L_E)
summary(WM_L_R)


# Changing baited locations

WM_C <- lmer(data = subset(holeboard_masterdata, Session %in% c(14,15)), WMwSR ~ Treatment*Session + (1 |Pair/Calf_ID)) 
# singularity, remove Pair
WM_C_v2 <- lmer(data = subset(holeboard_masterdata, Session %in% c(14,15)), WMwSR ~ Treatment*Session + (1 |Calf_ID)) 
summary(WM_C_v2)
#No significant difference for the interaction, remove
WM_C_v3 <- lmer(data = subset(holeboard_masterdata, Session %in% c(14,15)), WMwSR ~ Treatment + Session + (1|Calf_ID))
summary(WM_C_v3)


# Re-learning phase

WM_R <- lmer(data = subset(holeboard_masterdata, Phase=="R"), WMwSR ~ Treatment*Session + (1|Pair/Calf_ID))
summary(WM_R)
#no singularity, keep pair nested
# no effect of trial, but restricted calves had lower WM during re-learning (??) t = -2.32, P = 0.022. R calves' scores increased more over time t = 2.83, P = 0.0056
#significant interaction, post-hoc by treatment; remove Pair
WM_R_R<-lmer(data = subset(holeboard_masterdata, Phase=="R" & Treatment=="R"), WMwSR ~ Session + (1|Calf_ID))
summary(WM_R_R)

WM_R_E <-lmer(data = subset(holeboard_masterdata, Phase=="R" & Treatment=="E"), WMwSR ~ Session + (1|Calf_ID))
summary(WM_R_E)


## General working memory

# random slope vs intercept testing
# intercept
GWM_L_2 <- lmer(data = subset(holeboard_masterdata, Phase=="L"), GWM ~ Treatment*Session + (1|Calf_ID))
# slope
GWM_L_3 <- lmer(data = subset(holeboard_masterdata, Phase=="L"), GWM ~ Treatment*Session + (1 + Session|Calf_ID))
# convergence failure; use intercept
AIC(GWM_L_2, GWM_L_3)


#Learning phase
GWM_L <- lmer(data = subset(holeboard_masterdata, Phase=="L"), GWM ~ Treatment*Session + (1|Pair/Calf_ID))
# no singularity, keep Pair
summary(GWM_L)

#No significant difference for the interaction, remove it
GWM_L_v2<-lmer(data = subset(holeboard_masterdata, Phase=="L"), GWM ~ Treatment + Session  + (1|Pair/Calf_ID))
summary(GWM_L_v2)
#GWM increased by session (t = 2.2, P = 0.029); no difference btwn groups (P = 0.29)

#Changing baited locations
GWM_C <-lmer(data = subset(holeboard_masterdata, Session %in% c(14,15)), GWM ~ Treatment*Session + (1|Pair/Calf_ID))
#model convergence failure, remove Pair
GWM_C_v2 <-lmer(data = subset(holeboard_masterdata, Session %in% c(14,15)), GWM ~ Treatment*Session + (1|Calf_ID))
summary(GWM_C_v2)
# interaction significant (P = 0.01)

#Post-hoc analysis by treatment; remove Pair
GWM_C_R <-lmer(data = subset(holeboard_masterdata, Session %in% c(14,15) & Treatment=="R"), GWM ~ Session + (1|Calf_ID))
GWM_C_E <-lmer(data = subset(holeboard_masterdata, Session %in% c(14,15) & Treatment=="E"), GWM ~ Session + (1|Calf_ID))

summary(GWM_C_R)
summary(GWM_C_E)


# Re-learning phase
GWM_R <- lmer(data = subset(holeboard_masterdata, Phase=="R"), GWM ~ Treatment*Session + (1|Pair/Calf_ID))
# no singularity, keep Pair
summary(GWM_R)
#No effect of the interaction (P = 0.19), remove
GWM_R_2<-lmer(data = subset(holeboard_masterdata, Phase=="R"), GWM ~ Treatment + Session  + (1|Pair/Calf_ID))
summary(GWM_R_2)
#There was an effect of session on GWM (LMM; Ft = 3.82, P < 0.001) but no effect of treatment (LMM; t=-1.63, P=0.12)


## Reference memory with success rate

# random slope vs intercept testing
# intercept
RM_L_2 <- lmer(data = subset(holeboard_masterdata, Phase=="L"), RMwSR ~ Treatment*Session + (1|Calf_ID))
# slope
RM_L_3 <- lmer(data = subset(holeboard_masterdata, Phase=="L"), RMwSR ~  Treatment*Session + (1 + Session|Calf_ID))
AIC(RM_L_2, RM_L_3)
# AIC favours slope model slightly

#Learning phase
RM_L <- lmer(data = subset(holeboard_masterdata, Phase=="L"), RMwSR ~ Treatment*Session + (1 + Session|Pair/Calf_ID))
# singularity, remove Pair
RM_L_v2 <- lmer(data = subset(holeboard_masterdata, Phase=="L"), RMwSR ~ Treatment*Session + (1 + Session|Calf_ID))
summary(RM_L_v2)
#No significant difference for the interaction (P = 0.2), we can remove it
RM_L_v3 <- lmer(data = subset(holeboard_masterdata, Phase=="L"), RMwSR ~ Treatment + Session + (1 + Session|Calf_ID))
summary(RM_L_v3)
# RM increased by session (t = 7.93, P < 0.001), but no difference between groups (t=1.948, P = 0.067)


#Changing baited locations
RM_C <-lmer(data = subset(holeboard_masterdata, Session %in% c(14,15)), RMwSR ~ Treatment*Session + (1|Pair/Calf_ID))
# singularity, remove Pair
RM_C_2 <-lmer(data = subset(holeboard_masterdata, Session %in% c(14,15)), RMwSR ~ Treatment*Session + (1|Calf_ID))
# singular; t-test instead

t.test(
  x = holeboard_masterdata$RMwSR[holeboard_masterdata$Session == 14 & holeboard_masterdata$Treatment == "R"],
  y = holeboard_masterdata$RMwSR[holeboard_masterdata$Session == 15 & holeboard_masterdata$Treatment == "R"],
  paired = TRUE, alternative = "greater"
)
# t = 5.3733, df = 9, p-value = 0.0002242; RM lower on S15 than S14

t.test(
  x = holeboard_masterdata$RMwSR[holeboard_masterdata$Session == 14 & holeboard_masterdata$Treatment == "E"],
  y = holeboard_masterdata$RMwSR[holeboard_masterdata$Session == 15 & holeboard_masterdata$Treatment == "E"],
  paired = TRUE, alternative = "greater"
)
# t = 3.4237, df = 9, p-value = 0.003791; RM lower on S15 than S14


# Relearning phase
RM_R <- lmer(data = subset(holeboard_masterdata, Phase=="R"), RMwSR ~  Treatment*Session + (1|Pair/Calf_ID))
# no singularity, keep Pair
summary(RM_R)
#No significant effect of the interaction, remove (P = 0.25)
RM_R_2 <- lmer(data = subset(holeboard_masterdata, Phase=="R"), RMwSR ~ Treatment + Session + (1|Pair/Calf_ID))
# singular, remove Pair
RM_R_3 <- lmer(data = subset(holeboard_masterdata, Phase=="R"), RMwSR ~ Treatment + Session + (1|Calf_ID))

summary(RM_R_3)
# R calves had higher RM (t = 2.5, P = 0.022) and RM increased with session (t = 2.78, P = 0.0065)


#### PLAY BEHAVIOUR ####

# histogram of each first, to check normality
hist(holeboard_data_dropna$Total_freq, main = "Histogram point play")
hist(holeboard_data_dropna$Run, main = "Histogram of Run play")
# both not normal, log transform

### play per minute (time spent running and no. of point play) 
# adjust raw play and trial duration

holeboard_masterdata$Trial_duration_min <- holeboard_masterdata$Trial_duration / 60

holeboard_masterdata$Runpermin <- holeboard_masterdata$Run / holeboard_masterdata$Trial_duration_min
holeboard_masterdata$Total_freqpermin <- holeboard_masterdata$Total_freq / holeboard_masterdata$Trial_duration_min


# then log transform
holeboard_masterdata$Log_Run_permin <- log(holeboard_masterdata$Runpermin + 1)
holeboard_masterdata$Log_Total_freq_permin <- log(holeboard_masterdata$Total_freqpermin + 1)


## time running per min
# initial learning
run_min_I<-lmer(Log_Run_permin~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
summary(run_min_I)
# interaction ns, remove
run_min_I_2<-lmer(Log_Run_permin~Treatment + Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
summary(run_min_I_2)


# bucket location change
run_min_LC <- lmer(Log_Run_permin~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
# singular, remove pair
run_min_LC <- lmer(Log_Run_permin~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
summary(run_min_LC)
# interaction ns, remove
run_min_LC_2 <- lmer(Log_Run_permin~Treatment + Session + (1|Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
summary(run_min_LC_2)


# re-learning
run_min_R<-lmer(Log_Run_permin~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
# singular, remove pair
run_min_R<-lmer(Log_Run_permin~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(run_min_R)
# no treatment effect; play increased with trial (t = 2.12 P = 0.026), R calves' play decreased more with trial (t = -2.01, P = 0.047)


## point play per min
# initial learning
pointplay_min_I<-lmer(Log_Total_freq_permin~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
summary(pointplay_min_I)
# interaction ns, remove
pointplay_min_I<-lmer(Log_Total_freq_permin~Treatment + Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
summary(pointplay_min_I)
# R calves played less (t = -3.07, P = 0.013), no effect of session P = 0.92

# baited location change
pointplay_min_LC <- lmer(Log_Total_freq_permin~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
# convergence, remove pair
pointplay_min_LC <- lmer(Log_Total_freq_permin~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
summary(pointplay_min_LC)
# interaction ns, remove
pointplay_min_LC <- lmer(Log_Total_freq_permin~Treatment + Session + (1|Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
summary(pointplay_min_LC)

# re-learning
pointplay_min_R<-lmer(Log_Total_freq_permin~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
# singular, remove pair
pointplay_min_R<-lmer(Log_Total_freq_permin~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(pointplay_min_R)
# interaction ns, remove
pointplay_min_R<-lmer(Log_Total_freq_permin~Treatment + Session + (1|Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(pointplay_min_R)



#### OTHER MEASURES OF PERFORMANCE ####

## check normality
hist(holeboard_masterdata$Visits, main = "Histogram bucket visits")
# not normal - but not that skewed?
hist(holeboard_masterdata$Voc, main = "Histogram of vocs")
# not normal
hist(holeboard_masterdata$Latency, main = "Histogram of latency")
# not normal
hist(holeboard_masterdata$Trial_duration, main = "Histogram of trial duration")
# not normal
hist(holeboard_masterdata$Success_prop, main = "histogram proportion rewards found")

# log-transform data
holeboard_masterdata$Log_Visits <- log(holeboard_masterdata$Visits +1)
holeboard_masterdata$Log_Voc <- log(holeboard_masterdata$Voc +1)
holeboard_masterdata$Log_Latency <- log(holeboard_masterdata$Latency +1)
holeboard_masterdata$Log_Trial_duration <- log(holeboard_masterdata$Trial_duration +1)
view(holeboard_masterdata)


## vocalisations
# initial learning
vocs_I<-lmer(Log_Voc~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
# singular, remove Pair
vocs_I_2 <-lmer(Log_Voc~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
summary(vocs_I_2)
# interaction ns, remove
vocs_I_3 <-lmer(Log_Voc~Treatment + Session + Treatment + (1|Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
summary(vocs_I_3)
# no treatment or session effect (p > 0.05)

# bucket location change
vocs_LC <- lmer(Log_Voc~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
summary(vocs_LC)
# R calves vocalised less than E calves (t = -2.76, p = 0.0126), session had no effect (t = -2.764, p = 0.594), R calves increased vocs more btwn session 14/15 (t = 2.843, p = 0.0108)

# re-learning 
vocs_R<-lmer(Log_Voc~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(vocs_R)
# interaction n.s., remove
vocs_R_2<-lmer(Log_Voc~Treatment + Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(vocs_R_2)
# no effect of treatment (p = 0.29), vocs decreased by session (t = -2.267, p = 0.0256)


## latency to first bucket
# initial learning
lat_I<-lmer(Log_Latency~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
summary(lat_I)
# no effect of treatment (p = 0.49), session (p = 0.51), but R calves' latency decreased more by session than E calves (t = -2.028, p = 0.0436)

# bucket location change
lat_LC <- lmer(Log_Latency~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
# singular, remove Pair
lat_LC <- lmer(Log_Latency~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
# singular, t-test
t.test(
  x = holeboard_masterdata$Log_Latency[holeboard_masterdata$Session == 14 & holeboard_masterdata$Treatment == "R"],
  y = holeboard_masterdata$Log_Latency[holeboard_masterdata$Session == 15 & holeboard_masterdata$Treatment == "R"],
  paired = TRUE, alternative = "less"
)
# t = 1.0332, df = 9, p = 0.836; no difference in latency btwn 14/15

t.test(
  x = holeboard_masterdata$Log_Latency[holeboard_masterdata$Session == 14 & holeboard_masterdata$Treatment == "E"],
  y = holeboard_masterdata$Log_Latency[holeboard_masterdata$Session == 15 & holeboard_masterdata$Treatment == "E"],
  paired = TRUE, alternative = "less"
)
# t = 0.017, df = 9, p = 0.51; ""

# re-learning 
lat_R<-lmer(Log_Latency~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
# singular, remove pair
lat_R<-lmer(Log_Latency~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(lat_R)
# interaction ns. remove
lat_R<-lmer(Log_Latency~Treatment + Session + (1|Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(lat_R)
# R calves had lower latency (t = -2.94, p = 0.0088); latency increased with session (t = 2.97, p = 0.0037)


## trial duration
# initial learning
trial_I<-lmer(Log_Trial_duration~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
# singular, remove pair
trial_I<-lmer(Log_Trial_duration~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
summary(trial_I)
# no effect of treatment (p = 0.789), trial duration decreased by session (t = -3.839, p < 0.001), R calves' duration decreased more per session than E (t = -4.71, p < 0.001)

# bucket location change
trial_LC <- lmer(Log_Trial_duration~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
#singular, remove pair
trial_LC <- lmer(Log_Trial_duration~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
summary(trial_LC)
# R calves had lower trial duration than E (t = -2.151, p = 0.0452), duration tended to increase btwn the two sessions (t = 1.92, p = 0.071), and tended to increase more btwn sessions for R calves (t = 2.027, p = 0.0577)

# re-learning 
trial_R<-lmer(Log_Trial_duration~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(trial_R)
# no effect of treatment (p = 0.126) or session (0.355), but R claves' duration decreased by session more than E (t = -2.23, p = 0.028)


## proportion of baited buckets found - not log-transformed (this is only for continuous measures); use clmm which is ordinal and accepts random effects
# ordinal logistic regression NOT LMM 
# make sure variable is ordered
holeboard_masterdata$Success <- factor(holeboard_masterdata$Success, ordered = TRUE)
subset_holeboard_14_15$Session <- factor(subset_holeboard_14_15$Session)

holeboard_masterdata$Calf_ID <- as.factor(holeboard_masterdata$Calf_ID)
holeboard_masterdata$Pair    <- as.factor(holeboard_masterdata$Pair)

# initial learning
success_I<-clmm(Success~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
summary(success_I)
# no effect of treatment (??, p = 0.747), success increased by session (t = 2.27, p = 0.0232), and R calves' success increased more by session than E ( t = 4.061, p < 0.001)


# bucket location change
success_LC <- clmm(Success~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
# singular/absolute convergence, remove pair
success_LC <- clmm(Success~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
# singularity again, remove interaction
success_LC_2 <- clmm(Success~Treatment+Session + (1|Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
summary(success_LC_2)
# R calves had greater success than E (z= 3.4, p < 0.001), and success dropped for all calves on session 15 (z = -2.24, p = 0.0246)

# re-learning 
success_R<-clmm(Success~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(success_R)
# no effect of treatment (p = 0.19) or session (p = 0.48) but r calves found more baited buckets with each trial compared to E (z = 2.16, p = 0.031)

## number of visits to bucket

# learning phase
visits_L_2<-lmer(Log_Visits~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
# nested random effect ok
summary(visits_L_2)
# interaction n.s., remove (P = 0.31)
visits_L_3<-lmer(Log_Visits~Treatment + Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="L")) 
summary(visits_L_3)
# visits decreased by session ( t = -3.95, P < 0.001); no effect of treatment (P = 0.53)

# bucket location change

visits_LC <- lmer(Log_Visits~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
# singular, remove Pair
visits_LC_2 <- lmer(Log_Visits~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Session %in% c(14,15)))
# singular, run t-test
t.test(
  x = holeboard_masterdata$Log_Visits[holeboard_masterdata$Session == 14 & holeboard_masterdata$Treatment == "R"],
  y = holeboard_masterdata$Log_Visits[holeboard_masterdata$Session == 15 & holeboard_masterdata$Treatment == "R"],
  paired = TRUE, alternative = "less"
)
# t = -5.0765, df = 9, p-value = 0.000333; visits increased after bucket location change

t.test(
  x = holeboard_masterdata$Log_Visits[holeboard_masterdata$Session == 14 & holeboard_masterdata$Treatment == "E"],
  y = holeboard_masterdata$Log_Visits[holeboard_masterdata$Session == 15 & holeboard_masterdata$Treatment == "E"],
  paired = TRUE, alternative = "less"
)
# t = -1.6883, df = 9, p-value = 0.06282; visits increased after location change

# re-learning phase

visits_R_2<-lmer(Log_Visits~Treatment + Session + Treatment*Session + (1|Pair/Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
# singular, remove Pair
visits_R_2<-lmer(Log_Visits~Treatment + Session + Treatment*Session + (1|Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(visits_R_2)
# interaction n.s., remove
visits_R_3<-lmer(Log_Visits~Treatment + Session + (1|Calf_ID), subset(holeboard_masterdata, Phase=="R")) 
summary(visits_R_3)
# R calves visited more buckets than E (t = 2.3, P = 0.033), and visits decreased by session (t = -4.54, P < 0.001)


#### MILK INTAKE AND PLAY ####

##Effect of milk drank the day before on play, Enhanced group only
# R group model won't run due to lack of variation or observations in this group

run_milk <- lmer(Log_Run~Milk_prev + (1|Calf_ID), subset(holeboard_masterdata,Treatment=="E"))
summary(run_milk)
# No effect of milk intake from the previous day on the run time (P = 0.17)

play_milk <- lmer(Log_Total_freq~Milk_prev + (1|Calf_ID), subset(holeboard_masterdata,Treatment=="E"))
summary(play_milk)
#Play increased when milk drank the previous day was higher (LMM: t = 2.13, P = 0.034)


### quick check of data means

holeboard_masterdata %>%
  group_by(Session, Treatment) %>%
  summarise(
    mean = mean(Runpermin, na.rm = TRUE),
    se   = sd(Runpermin, na.rm = TRUE) / sqrt(sum(!is.na(Runpermin))),
    .groups = "drop"
  )
